import os
import random

import pandas as pd

base_dir = r'H:\notebook\ResNet\flower_photos'
val_ratio = 0.2
test_ratio = 0.1
labels = []


def main():
    train = []
    val = []
    test = []
    pathes = os.listdir(base_dir)
    for name in pathes:
        tmp_dir = os.path.join(base_dir, name)
        if os.path.isdir(tmp_dir):
            files = os.listdir(tmp_dir)
            val_size = int(len(files) * val_ratio)
            test_size = int(len(files) * test_ratio)
            if name in labels:
                name = labels.index(name)
            else:
                labels.append(name)
                name = labels.index(name)

            for i in range(val_size):
                val.append((os.path.join(tmp_dir, files[i]), name))
            for i in range(val_size, test_size + val_size):
                test.append((os.path.join(tmp_dir, files[i]), name))
            for i in range(test_size, len(files)):
                train.append((os.path.join(tmp_dir, files[i]), name))
    random.shuffle(train)
    random.shuffle(test)
    random.shuffle(val)

    tr_df = pd.DataFrame(train, columns=['path', 'label'])
    te_df = pd.DataFrame(test, columns=['path', 'label'])
    va_df = pd.DataFrame(val, columns=['path', 'label'])

    tr_df.to_csv(os.path.join(base_dir, 'train.csv'))
    te_df.to_csv(os.path.join(base_dir, 'test.csv'))
    va_df.to_csv(os.path.join(base_dir, 'val.csv'))

    with open(os.path.join(base_dir, 'labels.txt'), 'w') as f:
        for i in labels:
            f.write("{}\n".format(i))


if __name__ == '__main__':
    main()
